In [3]:

    
from matplotlib import pyplot as plt
%matplotlib inline

import pandas as pd
import numpy as np

from datetime import datetime, timedelta

Consumption data



In [ ]:



In [2]:

    
kwhs = pd.read_csv("../../prop39schools/sample_data/15634040000000_2012-2013_SCE_ELECTRIC_20150722.xml_INTERVAL.csv", header=None)



In [3]:

    
kwhs.rename(columns={0:"id", 1:"date"}, inplace=True)

# date is in Unix timestamp format, convert to ISO format
kwhs['date'] = kwhs['date'].apply(lambda x: datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d'))
agreement_id = kwhs['id'].iloc[0]
kwhs.drop("id", axis=1, inplace=True)

kwhs.set_index(["date"], inplace=True)



In [4]:

    
kwhs.head()









    Out[4]:






  
    
      
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      ...
      192
      193
      194
      195
      196
      197
      198
      199
      200
      201
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2012-06-30
      0.72
      4.32
      3.06
      0.66
      0.60
      0.66
      0.66
      0.72
      0.72
      0.60
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2012-07-01
      0.66
      0.72
      0.66
      0.66
      0.66
      0.72
      0.66
      0.66
      0.72
      0.66
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2012-07-02
      0.72
      0.72
      0.66
      0.66
      0.66
      0.66
      0.66
      0.66
      0.72
      0.72
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2012-07-03
      0.72
      0.66
      0.66
      0.72
      0.66
      0.78
      0.66
      0.72
      0.66
      0.78
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2012-07-04
      0.66
      0.72
      0.66
      0.72
      0.72
      0.78
      0.66
      0.72
      0.66
      0.66
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 200 columns



In [5]:

    
kwhs.shape









    Out[5]:





(1428, 200)



In [6]:

    
(kwhs.sum(0)>0).sum()









    Out[6]:





98



In [7]:

    
(kwhs.count()>0).sum()-2









    Out[7]:





194



In [8]:

    
kwhs.ix['2013-06-03']









    Out[8]:






  
    
      
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      ...
      192
      193
      194
      195
      196
      197
      198
      199
      200
      201
    
    
      date
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2013-06-03
      0.6000
      0.660
      0.660
      0.600
      0.6
      0.660
      0.72
      0.600
      0.660
      0.600
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2013-06-03
      4.0000
      3.200
      3.600
      4.000
      3.6
      3.600
      3.60
      4.000
      3.600
      3.600
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2013-06-03
      1.9981
      1.926
      1.818
      2.034
      1.8
      1.818
      1.80
      2.034
      1.944
      1.746
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
    
      2013-06-03
      1.1700
      1.140
      1.110
      1.210
      1.2
      1.320
      1.40
      1.410
      1.410
      1.180
      ...
      0
      0
      0
      0
      0
      0
      NaN
      NaN
      NaN
      NaN
    
  

4 rows × 200 columns



In [9]:

    
# It looks like this is 15-minute interval data?

plt.figure(figsize=(8,4))
plt.plot(kwhs.max().values, label="max", lw=2)
plt.plot(kwhs.std().values, label="std", lw=2)
plt.plot(kwhs.mean().values, label="avg", lw=2)

idx = np.random.choice(kwhs.index.values, 5)
for i in idx:
    plt.plot(kwhs.ix[i].values[0], label=i)
    
plt.ylabel("kWh")
plt.legend()









    Out[9]:





<matplotlib.legend.Legend at 0x7c61a90>



In [10]:

    
idx









    Out[10]:





array(['2012-07-26', '2012-12-25', '2012-12-24', '2012-12-31', '2012-09-18'], dtype=object)



In [11]:

    
plt.figure(figsize=(8,6))
plt.imshow(kwhs.iloc[:,:96].values, interpolation='nearest', aspect='auto')









    Out[11]:





<matplotlib.image.AxesImage at 0x7fecd30>

Billing data



In [12]:

    
bill = pd.read_csv("../../prop39schools/sample_data/15634040000000_2012-2013_SCE_ELECTRIC_20150722.xml_BILL.csv")
bill.iloc[1]









    Out[12]:





customer_name                            NaN
customer_city                            NaN
customer_zip                             NaN
customer_account                         NaN
lea_customer         Delano Union Elementary
cds_code                      15634040000000
school_site_name     Delano Union Elementary
school_city                           Delano
school_site_zip                   93215-2416
agreement                            1183021
units                                    kWh
demandUnits                               kW
rate_schedule_id                  GS-2-TOU-A
n_days                                    29
generation                             False
start_time                        1343088000
last_period                          2296.88
consumption_total                       7714
on_peak                                  NaN
semi_peak                                NaN
off_peak                                 NaN
on_peak_demand                         36.48
semi_peak_demand                       31.68
off_peak_demand                        27.84
Name: 1, dtype: object



In [13]:

    
bill.shape









    Out[13]:





(69, 24)



In [14]:

    
bill['start_time'] = bill['start_time'].apply(lambda x: datetime.fromtimestamp(int(x)).strftime('%Y-%m-%d'))

zipcode = bill['school_site_zip'].iloc[0]



In [15]:

    
bill['off_peak_demand'].hist(bins=20)









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fa8278>



In [16]:

    
bill['on_peak_demand'].hist(bins=20)









    Out[16]:





<matplotlib.axes._subplots.AxesSubplot at 0x9ef9588>



In [17]:

    
bill.head()









    Out[17]:






  
    
      
      customer_name
      customer_city
      customer_zip
      customer_account
      lea_customer
      cds_code
      school_site_name
      school_city
      school_site_zip
      agreement
      ...
      generation
      start_time
      last_period
      consumption_total
      on_peak
      semi_peak
      off_peak
      on_peak_demand
      semi_peak_demand
      off_peak_demand
    
  
  
    
      0
      NaN
      NaN
      NaN
      NaN
      Delano Union Elementary
      15634040000000
      Delano Union Elementary
      Delano
      93215-2416
      1183021
      ...
      False
      2012-06-20
      2144.97
      7544
      NaN
      NaN
      NaN
      33.36
      30.96
      24.48
    
    
      1
      NaN
      NaN
      NaN
      NaN
      Delano Union Elementary
      15634040000000
      Delano Union Elementary
      Delano
      93215-2416
      1183021
      ...
      False
      2012-07-23
      2296.88
      7714
      NaN
      NaN
      NaN
      36.48
      31.68
      27.84
    
    
      2
      NaN
      NaN
      NaN
      NaN
      Delano Union Elementary
      15634040000000
      Delano Union Elementary
      Delano
      93215-2416
      1183021
      ...
      False
      2012-08-21
      1933.79
      6498
      NaN
      NaN
      NaN
      31.92
      26.16
      22.32
    
    
      3
      NaN
      NaN
      NaN
      NaN
      Delano Union Elementary
      15634040000000
      Delano Union Elementary
      Delano
      93215-2416
      1183021
      ...
      False
      2012-09-20
      1190.14
      5721
      NaN
      NaN
      NaN
      27.84
      30.48
      19.68
    
    
      4
      NaN
      NaN
      NaN
      NaN
      Delano Union Elementary
      15634040000000
      Delano Union Elementary
      Delano
      93215-2416
      1183021
      ...
      False
      2012-10-23
      757.61
      4727
      0
      2964
      1763
      0.00
      22.08
      14.64
    
  

5 rows × 24 columns



In [18]:

    
bill['school_site_name'].value_counts()









    Out[18]:





Delano Union Elementary    69
Name: school_site_name, dtype: int64



In [ ]:



In [ ]:

	2	3	4	5	6	7	8	9	10	11	...	192	193	194	195	196	197	198	199	200	201
date
2012-06-30	0.72	4.32	3.06	0.66	0.60	0.66	0.66	0.72	0.72	0.60	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2012-07-01	0.66	0.72	0.66	0.66	0.66	0.72	0.66	0.66	0.72	0.66	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2012-07-02	0.72	0.72	0.66	0.66	0.66	0.66	0.66	0.66	0.72	0.72	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2012-07-03	0.72	0.66	0.66	0.72	0.66	0.78	0.66	0.72	0.66	0.78	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2012-07-04	0.66	0.72	0.66	0.72	0.72	0.78	0.66	0.72	0.66	0.66	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN

	2	3	4	5	6	7	8	9	10	11	...	192	193	194	195	196	197	198	199	200	201
date
2013-06-03	0.6000	0.660	0.660	0.600	0.6	0.660	0.72	0.600	0.660	0.600	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2013-06-03	4.0000	3.200	3.600	4.000	3.6	3.600	3.60	4.000	3.600	3.600	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2013-06-03	1.9981	1.926	1.818	2.034	1.8	1.818	1.80	2.034	1.944	1.746	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN
2013-06-03	1.1700	1.140	1.110	1.210	1.2	1.320	1.40	1.410	1.410	1.180	...	0	0	0	0	0	0	NaN	NaN	NaN	NaN

	customer_name	customer_city	customer_zip	customer_account	lea_customer	cds_code	school_site_name	school_city	school_site_zip	agreement	...	generation	start_time	last_period	consumption_total	on_peak	semi_peak	off_peak	on_peak_demand	semi_peak_demand	off_peak_demand
0	NaN	NaN	NaN	NaN	Delano Union Elementary	15634040000000	Delano Union Elementary	Delano	93215-2416	1183021	...	False	2012-06-20	2144.97	7544	NaN	NaN	NaN	33.36	30.96	24.48
1	NaN	NaN	NaN	NaN	Delano Union Elementary	15634040000000	Delano Union Elementary	Delano	93215-2416	1183021	...	False	2012-07-23	2296.88	7714	NaN	NaN	NaN	36.48	31.68	27.84
2	NaN	NaN	NaN	NaN	Delano Union Elementary	15634040000000	Delano Union Elementary	Delano	93215-2416	1183021	...	False	2012-08-21	1933.79	6498	NaN	NaN	NaN	31.92	26.16	22.32
3	NaN	NaN	NaN	NaN	Delano Union Elementary	15634040000000	Delano Union Elementary	Delano	93215-2416	1183021	...	False	2012-09-20	1190.14	5721	NaN	NaN	NaN	27.84	30.48	19.68
4	NaN	NaN	NaN	NaN	Delano Union Elementary	15634040000000	Delano Union Elementary	Delano	93215-2416	1183021	...	False	2012-10-23	757.61	4727	0	2964	1763	0.00	22.08	14.64